# Load necessary libraries
set.seed(123)
library(ggplot2)
library(splines)
library(minpack.lm)
#install.packages("pracma")
library(pracma)
#install.packages("nls2")
library(nls2)
## Loading required package: proto
Data Cleaning and Plotting
# Load and process the
library(readxl)
Data_McManus2_WT_P2L_AAVCremCherry <- read_excel("Data_McManus2_WT_P2L_AAVCremCherry.xlsx")
## New names:
## • `` -> `...1`
df_MC <- as.data.frame(Data_McManus2_WT_P2L_AAVCremCherry)
# Rename columns for clarity
colnames(df_MC) <- c("Time", "X1", "X2", "X3", "X4")
# Remove outliers where Time is between 100 and 100.1
df_MC <- df_MC[!(df_MC$Time >= 100 & df_MC$Time <= 100.1), ]
# Plot each species
ggplot(df_MC, aes(x = Time)) +
geom_line(aes(y = X1, color = "X1")) +
geom_line(aes(y = X2, color = "X2")) +
geom_line(aes(y = X3, color = "X3")) +
geom_line(aes(y = X4, color = "X4")) +
labs(title = "Data without Outliers", x = "Time", y = "X(t_i)") +
scale_color_manual(values = c("X1" = "blue", "X2" = "green", "X3" = "red", "X4" = "purple")) +
theme_minimal()
spline_X1 <- predict(smooth.spline(df_MC$Time, df_MC$X1), spar =)
spline_X2 <- predict(smooth.spline(df_MC$Time, df_MC$X2))
spline_X3 <- predict(smooth.spline(df_MC$Time, df_MC$X3))
spline_X4 <- predict(smooth.spline(df_MC$Time,df_MC$X4))
# Plot the spline-smoothed data
plot(df_MC$Time, spline_X1$y, type = "l", col = "red", lwd = 2,
xlab = "Time", ylab = "Values", main = " Splines",
ylim = range(c(spline_X1$y, spline_X2$y, spline_X3$y, spline_X4)))
lines(df_MC$Time, spline_X2$y, col = "blue", lwd = 2)
lines(df_MC$Time, spline_X3$y, col = "green", lwd = 2)
lines(df_MC$Time, spline_X4$y, col = "purple", lwd = 2)
legend("topright", legend = c("spline_X1", "spline_X2", "spline_X3", "spline_X4"),
col = c("red", "blue", "green", "purple"), lwd = 2)
# Plot the spline-smoothed data
spline_X1 <- predict(smooth.spline(df_MC$Time, df_MC$X1, spar=1)) #black line in 2nd #plot(average for X1)
plot(df_MC$Time, df_MC$X1, type = "l", col = "yellow", lwd = 2,
xlab = "Time", ylab = "Values", main = " Splines",
ylim = range(c(spline_X1$y, spline_X2$y, spline_X3$y, spline_X4)))
lines(df_MC$Time, spline_X1$y)
plot(df_MC$Time, df_MC$X1-spline_X1$y, type = "l", col = "yellow", lwd = 2,
xlab = "Time", ylab = "Values", main = " Detrended Splines")
x <- 1:length(df_MC$Time)
y <- df_MC$X1-spline_X1$y
maxima <- findpeaks(df_MC$X1-spline_X1$y, minpeakheight=0)
minima <- findpeaks(-df_MC$X1-spline_X1$y, minpeakheight=0)
x_max <- maxima[,2]
y_max <- maxima[,1]
fit <- nls(y_max ~ a * exp(-b * x_max) + c, start = list(a = max(y_max), b = 0.01, c = min(y_max)))
summary(fit)
##
## Formula: y_max ~ a * exp(-b * x_max) + c
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## a 1.115e+03 9.552e+00 116.69 <2e-16 ***
## b 5.000e-04 1.421e-05 35.18 <2e-16 ***
## c 2.142e+02 1.051e+01 20.37 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 21.65 on 98 degrees of freedom
##
## Number of iterations to convergence: 13
## Achieved convergence tolerance: 5.184e-06
a <- coef(fit)["a"]
b <- coef(fit)["b"]
c <- coef(fit)["c"]
plot(x, y, type="l", col="yellow", main="Detrended Splines with Exponential Fit")
lines(x_max, predict(fit, list(x_max = x_max)), col="red", lwd=2) #
Simulate Data Based on the Fitted Model
t <- seq(0, 100, by = 0.1)
model_function <- function(t, A, gamma, omega, phi, y_shift) {
A * exp(-gamma * t ) * cos(omega * t + phi) + y_shift
}
# using estimated parameters to def simulating function
simulate_data <- function(t, params, sigma) {
predicted <- model_function(t, params[1], params[2], params[3], params[4], params[5])
noise <- rnorm(length(t), mean = 0, sd = sigma) #using sigma0 to produce noise
simulated_data <- predicted + noise
return(simulated_data)
}
sigma0 = 10
param0=c(1000, 0.025, 0.9, 0,0)
simulated_data <- simulate_data(t, param0, sigma0 )
plot(t, simulated_data, type = "l", col = "blue", main = "Simulated Data", xlab = "Time", ylab = "Simulated Values")
true_params <- c(A = 1000, gamma = 0.025, omega = 0.9, phi = 0, y_shift = 0)
sigma0 <- 10
t <- seq(0, 100, by = 0.1)
set.seed(123)
y_sim <- model_function(t, true_params[1], true_params[2], true_params[3], true_params[4], true_params[5]) +
rnorm(length(t), mean = 0, sd = sigma0)
# default algorithm in nls: Gauss-Newton
fit_nls <- nls(
y_sim ~ model_function(t, A, gamma, omega, phi, y_shift),
start = list(A = 950, gamma = 0.03, omega = 0.85, phi = 0.1, y_shift = 0), #initial value close to true ones
data = data.frame(t = t, y_sim = y_sim)
)
#
summary(fit_nls)
##
## Formula: y_sim ~ model_function(t, A, gamma, omega, phi, y_shift)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## A 1.001e+03 1.447e+00 691.477 <2e-16 ***
## gamma 2.503e-02 5.468e-05 457.732 <2e-16 ***
## omega 9.000e-01 5.483e-05 16413.378 <2e-16 ***
## phi -2.465e-04 1.459e-03 -0.169 0.866
## y_shift 1.495e-01 3.144e-01 0.475 0.635
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.935 on 996 degrees of freedom
##
## Number of iterations to convergence: 9
## Achieved convergence tolerance: 7.293e-06
coef(fit_nls)
## A gamma omega phi y_shift
## 1.000598e+03 2.503082e-02 9.000001e-01 -2.464913e-04 1.494642e-01
##Task 2 find peaks
t <- seq(0, 100, by = 0.1)
# find peaks
peaks <- findpeaks(y_sim, minpeakheight = 100, minpeakdistance = 15)
# peaks[, 2] :index of peaks in (1~1000)
#t[ peaks[, 2]]: index of peaks in(1~100)
t_peaks <- t[peaks[, 2]]
y_peaks <- peaks[, 1]
#
plot(t, y_sim, type = "l", col = "blue", main = "Simulated Data with Peaks", xlab = "Time", ylab = "Simulated Values")
points(t_peaks, y_peaks, col = "red", pch = 19)
## Finding initial parameters:A and gamma for simulated data
# transform into log
log_y_peaks <- log(y_peaks)
# logy = -gamma*t + logA
fit_linear <- lm(log_y_peaks ~ t_peaks)
summary(fit_linear)
##
## Call:
## lm(formula = log_y_peaks ~ t_peaks)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.096938 -0.017879 -0.002984 0.026874 0.049729
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.876027 0.019252 357.16 <2e-16 ***
## t_peaks -0.022952 0.000335 -68.52 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.03923 on 13 degrees of freedom
## Multiple R-squared: 0.9972, Adjusted R-squared: 0.997
## F-statistic: 4694 on 1 and 13 DF, p-value: < 2.2e-16
print("coef:")
## [1] "coef:"
coef(fit_linear)
## (Intercept) t_peaks
## 6.87602738 -0.02295209
coef(fit_linear)[1]
## (Intercept)
## 6.876027
print("coef:")
## [1] "coef:"
# get log(A) and gamma
log_A <- coef(fit_linear)[1]
init_gamma <- -coef(fit_linear)[2] #Extract gamma from the linear fit
cat("log(A) =", log_A, "\n")
## log(A) = 6.876027
cat("gamma =", init_gamma, "\n")
## gamma = 0.02295209
init_A=exp(log_A)
print(init_A)
## (Intercept)
## 968.7701
#Plot the simulated data
plot(t, y_sim, type = "l", col = "blue", main = "Simulated Data with Fitted Exponential", xlab = "Time", ylab = "Simulated Values")
# Calculate the fitted exponential curve using estimated A and gamma from the linear model
init_A <- exp(log_A) # Extract log_A from the linear model
# Add the fitted exponential curve
curve(init_A * exp(-init_gamma * x), from = 0, to = max(t), col = "green", add = TRUE, lwd = 2)
# Add a legend
legend("topright", legend = c("Simulated Data", "Fitted Exponential"), col = c("blue", "green"), lty = 1)
# Extract the time points of the first two consecutive maxima from t_peaks
t_max1 <- t_peaks[1] # First maximum
t_max2 <- t_peaks[2] # Second maximum
t_max3 <- t_peaks[3]
t_max4 <- t_peaks[4]
# Calculate the time difference between two consecutive maxima
Delta <- t_max2 - t_max1
Delta
## [1] 6.7
print(t_max3-t_max2)
## [1] 6.9
print(t_max4 - t_max3)
## [1] 7
# Estimate omega (tau) using the formula 2*pi/Delta
init_omega <- 2 * pi / Delta
# Output the result
cat("initial omega (tau) =", init_omega, "\n")
## initial omega (tau) = 0.9377889
cat("Time difference between consecutive maxima (Delta) =", Delta, "\n")
## Time difference between consecutive maxima (Delta) = 6.7
we assume period delta keeps same. now our initial parameters are[A = 968.7701,gamma = 0.02295209 ,omega = 0.9377889 , phi = 0, y_shift = 0]
library(nlme)
fit_nls <- nls(
y_sim ~ model_function(t, A, gamma, omega, phi, y_shift),
start = list(A = init_A, gamma = init_gamma, omega = init_omega, phi = 0.1, y_shift = 0),
data = data.frame(t = t, y_sim = y_sim)
)
fit_nls
## Nonlinear regression model
## model: y_sim ~ model_function(t, A, gamma, omega, phi, y_shift)
## data: data.frame(t = t, y_sim = y_sim)
## A gamma omega phi y_shift
## 1.001e+03 2.503e-02 9.000e-01 -2.467e-04 1.495e-01
## residual sum-of-squares: 98312
##
## Number of iterations to convergence: 8
## Achieved convergence tolerance: 5.117e-06
The code below manually calculates the covariance matrix of a linear
regression model and verifies its consistency with the automatically
computed result from the lm function.
Specifically, it first generates data and constructs the design matrix \(X\). Then, it manually computes the regression coefficients using the formula \[ \hat{\beta} = (X^T X)^{-1} X^T y. \]
Next, the covariance matrix is calculated with the formula \[ \text{Cov}(\hat{\beta}) = (X^T X)^{-1} \cdot \frac{\sum \text{resid}^2}{\text{df.res}}, \] where \(\text{resid} = y - X\hat{\beta}\) represents the residuals and \(\text{df.res}\) is the degrees of freedom, used to adjust the error variance.
Finally, all.equal is used to compare the manually
computed covariance matrix with the result from vcov(mod),
confirming their consistency, with the result TRUE
indicating agreement.
library(data.table)
df = data.table(y = runif(100, 0, 100), #generates 100 random deviates. for 0<x,y,z<100
x = runif(100, 0, 100),
z = runif(100, 0, 100))
mod = lm(y ~ ., df)
X = cbind(1, as.matrix(df[, -1])) #only include x and z
invXtX = solve(crossprod(X)) # compute for inverse of crossproduct(X^t*X)
coef = invXtX %*% t(X) %*% df$y # compute coef manually: coef = (X^t*X)^(-1) *X^t*y
resid = df$y - X %*% coef # residual = real value - predict value
df.res = nrow(X) - ncol(X) #degree of freedom
manual = invXtX * sum(resid**2)/(df.res) #
funct = vcov(mod)
print(mod)
##
## Call:
## lm(formula = y ~ ., data = df)
##
## Coefficients:
## (Intercept) x z
## 58.23178 -0.10573 -0.06033
all.equal(manual, funct, check.attributes = F)
## [1] TRUE
#install.packages("nlstools")
library(nlstools)
##
## 'nlstools' has been loaded.
## IMPORTANT NOTICE: Most nonlinear regression models and data set examples
## related to predictive microbiolgy have been moved to the package 'nlsMicrobio'
library(stats)
simulate_and_estimate <- function(true_params, sigma0 = 10) {
#simulate data
t <- seq(0, 100, by = 0.1)
model_function <- function(t, A, gamma, omega, phi, y_shift) {
A * exp(-gamma * t) * cos(omega * t + phi) + y_shift
}
y_sim <- model_function(t, true_params[1], true_params[2], true_params[3], true_params[4], true_params[5]) +
rnorm(length(t), mean = 0, sd = sigma0)
# find initial value
peaks <- findpeaks(y_sim, minpeakheight = 100, minpeakdistance = 15)
t_peaks <- t[peaks[, 2]]
y_peaks <- peaks[, 1]
log_y_peaks <- log(y_peaks)
fit_linear <- lm(log_y_peaks ~ t_peaks)
log_A <- coef(fit_linear)[1]
init_gamma <- -coef(fit_linear)[2]
init_A <- exp(log_A)
Delta <- t_peaks[2] - t_peaks[1]
init_omega <- 2 * pi / Delta
# find estimated parameters
fit_nls <- try(nls(
y_sim ~ model_function(t, A, gamma, omega, phi, y_shift),
start = list(A = init_A, gamma = init_gamma, omega = init_omega, phi = 0.1, y_shift = 0),
data = data.frame(t = t, y_sim = y_sim)
), silent = TRUE)
if (class(fit_nls) == "try-error") {
return(NULL)
}
# find estmate parameters and standard errors
param_estimates <- coef(fit_nls)
# Calculate standard errors and confidence intervals
conf_intervals <- confint2(fit_nls, level = 0.99, method = 'asymptotic')
return(list(estimates = param_estimates, conf_intervals = conf_intervals))
}
# test function
true_params <- c(A = 1000, gamma = 0.025, omega = 0.9, phi = 0, y_shift = 0)
result <- simulate_and_estimate(true_params)
result
## $estimates
## A gamma omega phi y_shift
## 9.975672e+02 2.498159e-02 9.000614e-01 -3.702137e-04 1.260699e-01
##
## $conf_intervals
## 0.5 % 99.5 %
## A 993.855376026 1.001279e+03
## gamma 0.024841109 2.512208e-02
## omega 0.899920475 9.002023e-01
## phi -0.004124713 3.384285e-03
## y_shift -0.681001289 9.331410e-01
# simulate 100
n_simulations <- 100
results <- vector("list", n_simulations)
#
for (i in 1:n_simulations) {
results[[i]] <- simulate_and_estimate(true_params)
}
results <- Filter(Negate(is.null), results)
null_count <- sum(sapply(results, is.null))
cat("Number of unsuccessful fits (NULL results):", null_count, "\n")
## Number of unsuccessful fits (NULL results): 0
# Initialize coverage counters, one counter for each parameter
coverage_counts <- rep(0, length(true_params))
# Iterate through each simulation result to check coverage.
for (i in 1:length(results)) {
ci <- results[[i]]$conf_intervals
for (j in 1:length(true_params)) {
# Check if it falls within the confidence interval.
if (true_params[j] >= ci[j, 1] && true_params[j] <= ci[j, 2]) {
coverage_counts[j] <- coverage_counts[j] + 1
}
}
}
#calculate coverage
coverage_proportions <- coverage_counts / n_simulations
# print unsuccessful fits
cat("Coverage for A:", coverage_proportions[1], "\n")
## Coverage for A: 0.88
cat("Coverage for gamma:", coverage_proportions[2], "\n")
## Coverage for gamma: 0.87
cat("Coverage for omega:", coverage_proportions[3], "\n")
## Coverage for omega: 0.87
cat("Coverage for phi:", coverage_proportions[4], "\n")
## Coverage for phi: 0.87
cat("Coverage for y_shift:", coverage_proportions[5], "\n")
## Coverage for y_shift: 0.85
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between() masks data.table::between()
## ✖ dplyr::collapse() masks nlme::collapse()
## ✖ purrr::cross() masks pracma::cross()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first()
## ✖ lubridate::hour() masks data.table::hour()
## ✖ lubridate::isoweek() masks data.table::isoweek()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ lubridate::mday() masks data.table::mday()
## ✖ lubridate::minute() masks data.table::minute()
## ✖ lubridate::month() masks data.table::month()
## ✖ lubridate::quarter() masks data.table::quarter()
## ✖ lubridate::second() masks data.table::second()
## ✖ purrr::transpose() masks data.table::transpose()
## ✖ lubridate::wday() masks data.table::wday()
## ✖ lubridate::week() masks data.table::week()
## ✖ lubridate::yday() masks data.table::yday()
## ✖ lubridate::year() masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(tidyr)
# Extract valid results (remove NULL values);
valid_results <- Filter(Negate(is.null), results)
# Extract parameter estimates into a data frame
estimate_data <- do.call(rbind, lapply(valid_results, function(res) {
as.data.frame(t(res$estimates))
}))
colnames(estimate_data) <- c("A", "gamma", "omega", "phi", "y_shift")
estimate_long <- estimate_data %>%
pivot_longer(cols = everything(), names_to = "Parameter", values_to = "Estimate")
#install.packages("gridExtra")
library(gridExtra) # arange multiple plots
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
# Create separate boxplots for each parameter with suitable y-axis ranges;
plot_A <- ggplot(estimate_long %>% filter(Parameter == "A"),
aes(x = Parameter, y = Estimate)) +
geom_boxplot(fill = "skyblue", color = "darkblue") +
geom_hline(yintercept = true_params["A"], linetype = "dashed") +
labs(title = "A", y = "Estimate") +
theme_minimal() +
ylim(995, 1005)
plot_gamma <- ggplot(estimate_long %>% filter(Parameter == "gamma"),
aes(x = Parameter, y = Estimate)) +
geom_boxplot(fill = "lightgreen", color = "darkgreen") +
geom_hline(yintercept = true_params["gamma"], linetype = "dashed") +
labs(title = "gamma", y = "Estimate") +
theme_minimal() +
ylim(0.0248, 0.02515)
plot_omega <- ggplot(estimate_long %>% filter(Parameter == "omega"),
aes(x = Parameter, y = Estimate)) +
geom_boxplot(fill = "lightblue", color = "blue") +
geom_hline(yintercept = true_params["omega"], linetype = "dashed") +
labs(title = "omega", y = "Estimate") +
theme_minimal() +
ylim(0.89975, 0.90025)
plot_phi <- ggplot(estimate_long %>% filter(Parameter == "phi"),
aes(x = Parameter, y = Estimate)) +
geom_boxplot(fill = "pink", color = "red") +
geom_hline(yintercept = true_params["phi"], linetype = "dashed") +
labs(title = "phi", y = "Estimate") +
theme_minimal() +
ylim(min(estimate_data$phi) * 0.9, max(estimate_data$phi) * 1.1)
plot_y_shift <- ggplot(estimate_long %>% filter(Parameter == "y_shift"),
aes(x = Parameter, y = Estimate)) +
geom_boxplot(fill = "yellow", color = "orange") +
geom_hline(yintercept = true_params["y_shift"], linetype = "dashed") +
labs(title = "y_shift", y = "Estimate") +
theme_minimal() +
ylim(min(estimate_data$y_shift) * 0.9, max(estimate_data$y_shift) * 1.1)
# Arrange all subplots into a single large plot.
grid.arrange(plot_A, plot_gamma, plot_omega, plot_phi, plot_y_shift, ncol = 3)
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 2 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
This plot indicates that most of the estimated parameters obtained from
100 simulations are close to the true parameters.True parameters are
indicated as blacked dashed line in above plot.
finding initial parameters on asymmetric osillator model(on initial meeting file)
time <- df_MC$Time
X1 <- df_MC$X1
# FInd X1 valleys and peaks
peaks <- findpeaks(X1, minpeakheight = 600, minpeakdistance = 20)
peaks<-peaks[-6, ]
t_peaks <- time[peaks[, 2]]
y_peaks<- peaks[, 1]
#delete outlier
valleys <- findpeaks(-X1, minpeakheight = -500, minpeakdistance = 50)
t_valleys <- time[valleys[, 2]]
y_valleys <- -valleys[, 1] # restore
#
df_peaks <- data.frame(Time = t_peaks, Value = y_peaks)
df_valleys <- data.frame(Time = t_valleys, Value = y_valleys)
ggplot(df_MC, aes(x = Time, y = X1)) +
geom_line(color = "blue") +
geom_point(data = df_peaks, aes(x = Time, y = Value), color = "red", size = 2, shape = 17) +
geom_point(data = df_valleys, aes(x = Time, y = Value), color = "green", size = 2, shape = 18) +
labs(title = "X1 Data with Peaks and Valleys",
x = "Time",
y = "X1(t)") +
theme_minimal()
log_y_peaks <- log(y_peaks)
log_y_valleys <- log(y_valleys)
fit_peaks <- lm(log_y_peaks ~ t_peaks)
fit_valleys <- lm(log_y_valleys ~ t_valleys)
eta_max <- coef(fit_peaks)[2] # η_max
eta_min <- coef(fit_valleys)[2] # η_min
# estimate A_max and A_min
A_max <- exp(coef(fit_peaks)[1])
A_min <- exp(coef(fit_valleys)[1])
Delta <- t_peaks[2] - t_peaks[1]
tau <- 2 * pi / Delta
#assume phi = 0
phi <- 0
# print
cat("Initial A_max:", A_max, "\n")
## Initial A_max: 2428.445
cat("Initial A_min:", A_min, "\n")
## Initial A_min: 372.6489
cat("Initial eta_max:", eta_max, "\n")-
cat("Initial eta_min:", eta_min, "\n")
## Initial eta_max: -0.002618547
## Initial eta_min: -0.001868607
## integer(0)
cat("Initial tau:", tau, "\n")
## Initial tau: 0.2564565
cat("Initial phi:", phi, "\n")
## Initial phi: 0
max_exp_curve <- A_max * exp(eta_max * time)
min_exp_curve <- A_min * exp(eta_min * time)
#
df_peaks <- data.frame(Time = t_peaks, Value = y_peaks)
df_valleys <- data.frame(Time = t_valleys, Value = y_valleys)
df_max_exp_curve <- data.frame(Time = time, ExponentialCurve = max_exp_curve)
df_min_exp_curve <- data.frame(Time = time, ExponentialCurve = min_exp_curve)
#
ggplot(df_MC, aes(x = Time, y = X1)) +
geom_line(color = "blue", size = 1) + #
geom_line(data = df_max_exp_curve, aes(x = Time, y = ExponentialCurve), color = "purple", linewidth = 1, linetype = "dashed") + # expontential curve
geom_line(data = df_min_exp_curve, aes(x = Time, y = ExponentialCurve), color = "yellow", linewidth = 1, linetype = "dashed") + # expontential curve
geom_point(data = df_peaks, aes(x = Time, y = Value), color = "red", linewidth = 2, shape = 17) + #
geom_point(data = df_valleys, aes(x = Time, y = Value), color = "green", linewidth = 2, shape = 18) +
labs(title = "X1 Data with Exponential Curve Fitting",
x = "Time",
y = "X1(t)") +
theme_minimal() +
scale_y_continuous(limits = c(0, max(X1) * 1.1)) #
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in geom_point(data = df_peaks, aes(x = Time, y = Value), color = "red",
## : Ignoring unknown parameters: `linewidth`
## Warning in geom_point(data = df_valleys, aes(x = Time, y = Value), color =
## "green", : Ignoring unknown parameters: `linewidth`
using lm(),I cannot get a good fit, so I try to use nls to find initial
A and gamma:
nls_fit <- nls(y_peaks ~ A_max * exp(eta_max * t_peaks),
start = list(A_max = max(y_peaks), eta_max = -0.01),
control = nls.control(maxiter = 100))
A_max <- coef(nls_fit)["A_max"]
eta_max <- coef(nls_fit)["eta_max"]
# generate curve
exp_curve <- A_max * exp(eta_max * time)
df_exp_curve <- data.frame(Time = time, ExponentialCurve = exp_curve)
ggplot(df_MC, aes(x = Time, y = X1)) +
geom_line(color = "blue", size = 1) +
geom_line(data = df_exp_curve, aes(x = Time, y = ExponentialCurve), color = "purple", size = 1, linetype = "dashed") +
geom_point(data = df_peaks, aes(x = Time, y = Value), color = "red", size = 2, shape = 17) +
labs(title = "X1 Data with Improved Exponential Curve Fitting",
x = "Time",
y = "X1(t)") +
theme_minimal() +
scale_y_continuous(limits = c(0, max(X1) * 1.1))
still not good fit,
try to fit in 2 segments time before 100 and time after 100
# split data for early(t<100) and late segments(t>=100)
time_early <- time[time < 100]
time_late <- time[time >= 100]
y_peaks_early <- y_peaks[t_peaks < 100]
t_peaks_early <- t_peaks[t_peaks < 100]
y_valleys_early <- y_valleys[t_valleys < 100]
t_valleys_early <- t_valleys[t_valleys < 100]
y_peaks_late <- y_peaks[t_peaks >= 100]
t_peaks_late <- t_peaks[t_peaks >= 100]
y_valleys_late <- y_valleys[t_valleys >= 100]
t_valleys_late <- t_valleys[t_valleys >= 100]
# exponential fit when t <100
log_y_peaks_early <- log(y_peaks_early)
log_y_valleys_early <- log(y_valleys_early)
fit_peaks_early <- lm(log_y_peaks_early ~ t_peaks_early)
fit_valleys_early <- lm(log_y_valleys_early ~ t_valleys_early)
A_max_early <- exp(coef(fit_peaks_early)[1])
eta_max_early <- -coef(fit_peaks_early)[2]
A_min_early <- exp(coef(fit_valleys_early)[1])
eta_min_early <- -coef(fit_valleys_early)[2]
# exponential fit when t > 100
log_y_peaks_late <- log(y_peaks_late)
log_y_valleys_late <- log(y_valleys_late)
fit_peaks_late <- lm(log_y_peaks_late ~ t_peaks_late)
fit_valleys_late <- lm(log_y_valleys_late ~ t_valleys_late)
A_max_late <- exp(coef(fit_peaks_late)[1])
eta_max_late <- -coef(fit_peaks_late)[2]
A_min_late <- exp(coef(fit_valleys_late)[1])
eta_min_late <- -coef(fit_valleys_late)[2]
#plot curve to test
# Generate exponential curves for early and late segments
max_exp_curve_early <- A_max_early * exp(-eta_max_early * time_early)
min_exp_curve_early <- A_min_early * exp(-eta_min_early * time_early)
max_exp_curve_late <- A_max_late * exp(-eta_max_late * time_late)
min_exp_curve_late <- A_min_late * exp(-eta_min_late * time_late)
# Convert data to data frames for ggplot
df_exp_curve_early <- data.frame(Time = time_early, MaxExpCurve = max_exp_curve_early, MinExpCurve = min_exp_curve_early)
df_exp_curve_late <- data.frame(Time = time_late, MaxExpCurve = max_exp_curve_late, MinExpCurve = min_exp_curve_late)
# Peaks and valleys of the original data
df_peaks <- data.frame(Time = t_peaks, Value = y_peaks)
df_valleys <- data.frame(Time = t_valleys, Value = y_valleys)
# Plot the original data and the fitted exponential curves
ggplot(df_MC, aes(x = Time, y = X1)) +
geom_line(color = "blue", size = 1) + # original data
geom_line(data = df_exp_curve_early, aes(x = Time, y = MaxExpCurve), color = "purple", size = 1, linetype = "dashed") + # Early segment max exponential curve
geom_line(data = df_exp_curve_early, aes(x = Time, y = MinExpCurve), color = "yellow", size = 1, linetype = "dashed") + # Early segment min exponential curve
geom_line(data = df_exp_curve_late, aes(x = Time, y = MaxExpCurve), color = "purple", size = 1, linetype = "dotted") + # late segment min exponential curve
geom_line(data = df_exp_curve_late, aes(x = Time, y = MinExpCurve), color = "yellow", size = 1, linetype = "dotted") + # late segment min exponential curve
geom_point(data = df_peaks, aes(x = Time, y = Value), color = "red", size = 2, shape = 17) + # Red points marking peaks
geom_point(data = df_valleys, aes(x = Time, y = Value), color = "green", size = 2, shape = 18) + # Green points marking valleys
labs(title = "X1 Data with Early and Late Exponential Curve Fitting",
x = "Time",
y = "X1(t)") +
theme_minimal() +
scale_y_continuous(limits = c(0, max(X1) * 1.1)) # Adjust y-axis range to better display data
now calculate omega for time <100 and time>=100
# For time < 100
# Calculate Delta and omega for early segment (time < 100)
Delta_early <- t_peaks_early[2] - t_peaks_early[1] # Assuming the first two peaks are used
omega_early <- 2 * pi / Delta_early
# For time >= 100
# Calculate Delta and omega for late segment (time >= 100)
Delta_late <- t_peaks_late[2] - t_peaks_late[1] # Assuming the first two peaks in the late segment are used
omega_late <- 2 * pi / Delta_late
Create a chart to show parameters
#install.packages("knitr") # Uncomment this line if knitr is not installed
#install.packages("DT") # Uncomment this line if DT is not installed
library(knitr)
library(DT)
# Create a data frame
result_table <- data.frame(
Parameter = c("A_max", "eta_max", "A_min", "eta_min", "Delta", "omega"),
`time < 100` = c(A_max_early, eta_max_early, A_min_early, eta_min_early, Delta_early, omega_early),
`time >= 100` = c(A_max_late, eta_max_late, A_min_late, eta_min_late, Delta_late, omega_late)
)
# Display the table
# Method 1: Use knitr::kable to show a static table
kable(result_table, caption = "Comparison Table for Time < 100 and Time >= 100")
| Parameter | time…100 | time….100 |
|---|---|---|
| A_max | 2903.0532824 | 2159.5386566 |
| eta_max | 0.0049023 | 0.0022853 |
| A_min | 414.3190899 | 334.5334712 |
| eta_min | 0.0026202 | 0.0015619 |
| Delta | 24.5000000 | 25.6000000 |
| omega | 0.2564565 | 0.2454369 |
datatable(result_table, caption = "Comparison Table for Time < 100 and Time >= 100")
# when t = 1:
A_max_1 <- A_max_early * exp(-eta_max_early * 1)
A_min_1 <- A_min_early * exp(-eta_min_early * 1)
# the value of f(1)
f1 <- df_MC$X1[df_MC$Time == 1]
# phi
cos_omega_phi <- (f1 - 0.5 * (A_max_1 + A_min_1)) / (0.5 * (A_max_1 - A_min_1))
phi_initial <- acos(cos_omega_phi) - omega_early
#
print(phi_initial)
## (Intercept)
## 2.427676
# when t = 102
A_max_102 <- A_max_late * exp(-eta_max_late * 102)
A_min_102 <- A_min_late * exp(-eta_min_late * 102)
# the value of f(1)
f102 <- df_MC$X1[df_MC$Time == 102]
# phi
cos_omega_phi102 <- (f1 - 0.5 * (A_max_102 + A_min_102)) / (0.5 * (A_max_102 - A_min_102))
phi_102 <- acos(cos_omega_phi102) - omega_late
#
print(phi_102)
## (Intercept)
## 2.022232
#initial meeting model function
asym_model_function <- function(t, A_max, eta_max, A_min, eta_min, omega, phi) {
A_max_t <- A_max * exp(-eta_max * t)
A_min_t <- A_min * exp(-eta_min * t)
result <- 0.5 * (A_max_t + A_min_t) + 0.5 * (A_max_t - A_min_t) * cos(omega * t + phi)
return(result)
}
true_params_asym = c(A_max =2000 , eta_max = 0.005, A_min =200, eta_min = 0.0025,omega = 0.7, phi = 0)
simulate_asymm_func<- function(t, params, sigma) {
predicted <- asym_model_function(t, params[1], params[2], params[3], params[4], params[5],params[6])
noise <- rnorm(length(t), mean = 0, sd = sigma) #using sigma0 to produce noise
simulated_data <- predicted + noise
return(simulated_data)
}
sigma0 = 10
t <- seq(0, 100, by = 0.1)
simulated_asymm_data <- simulate_asymm_func(t, true_params_asym, sigma0 )
plot(t,simulated_asymm_data,type = "l", col = "blue")
peaks <- findpeaks(simulated_asymm_data, minpeakheight = 600, minpeakdistance = 20)
t_peaks <- time[peaks[, 2]]
y_peaks<- peaks[, 1]
#delete outlier
valleys <- findpeaks(-simulated_asymm_data, minpeakheight = -500, minpeakdistance = 50)
t_valleys <- time[valleys[, 2]]
y_valleys <- -valleys[, 1] # restore
df_simulated <- data.frame(Time = t, X1 = simulated_asymm_data)
# Create data frames for peaks and valleys
df_peaks <- data.frame(Time = t_peaks, Value = y_peaks)
df_valleys <- data.frame(Time = t_valleys, Value = y_valleys)
# Plotting using ggplot2
ggplot(df_simulated, aes(x = Time, y = X1)) +
geom_line(color = "blue") + # Main line for simulated data
geom_point(data = df_peaks, aes(x = Time, y = Value), color = "red", size = 2, shape = 17) + # Peaks in red
geom_point(data = df_valleys, aes(x = Time, y = Value), color = "green", size = 2, shape = 18) + # Valleys in green
labs(
title = "X1 Data with Peaks and Valleys",
x = "Time",
y = "X1(t)"
) +
theme_minimal()
# exponential fit when t <100
log_y_peaks <- log(y_peaks)
log_y_valleys <- log(y_valleys)
fit_peaks <- lm(log_y_peaks ~ t_peaks)
fit_valleys <- lm(log_y_valleys ~ t_valleys)
A_max_asym_sim <- exp(coef(fit_peaks)[1])
eta_max_asym_sim <- -coef(fit_peaks)[2]
A_min_asym_sim <- exp(coef(fit_valleys)[1])
eta_min_asym_sim <- -coef(fit_valleys)[2]
Delta_asym_sim <- t_peaks[2] - t_peaks[1] # Assuming the first two peaks are used
omega_asym_sim <- 2 * pi / Delta_asym_sim
initial_params_asymm_sim =c(A_max_asym_sim,eta_max_asym_sim,A_min_asym_sim,eta_min_asym_sim, omega_asym_sim, phi = 0)
print(initial_params_asymm_sim)
## (Intercept) t_peaks (Intercept) t_valleys phi
## 2.055468e+03 6.041656e-03 1.908841e+02 2.462641e-03 7.391983e-01 0.000000e+00
cat("true_params_asym: ", true_params_asym)
## true_params_asym: 2000 0.005 200 0.0025 0.7 0
# Define initial parameter values calculated from the peaks and valleys
initial_params_asymm_sim <- list(
A_max = A_max_asym_sim,
eta_max = eta_max_asym_sim,
A_min = A_min_asym_sim,
eta_min = eta_min_asym_sim,
omega = omega_asym_sim,
phi = 0
)
fit_nls <- nlsLM(
simulated_asymm_data ~ asym_model_function(t, A_max, eta_max, A_min, eta_min, omega, phi),
start = initial_params_asymm_sim,
data = data.frame(t = t, simulated_asymm_data = simulated_asymm_data)
)
# Display the summary of the fitted model
summary(fit_nls)
##
## Formula: simulated_asymm_data ~ asym_model_function(t, A_max, eta_max,
## A_min, eta_min, omega, phi)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## A_max.(Intercept) 2.000e+03 1.246e+00 1605.533 <2e-16 ***
## eta_max.t_peaks 4.992e-03 1.215e-05 410.980 <2e-16 ***
## A_min.(Intercept) 2.000e+02 1.198e+00 166.943 <2e-16 ***
## eta_min.t_valleys 2.497e-03 1.119e-04 22.314 <2e-16 ***
## omega 7.000e-01 2.329e-05 30048.486 <2e-16 ***
## phi 1.281e-03 1.159e-03 1.105 0.27
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.22 on 995 degrees of freedom
##
## Number of iterations to convergence: 14
## Achieved convergence tolerance: 1.49e-08
:I found that the model is not fitting(as initial parameters = estimated parameters)
true_params_asym = c(A_max =2000 , eta_max = 0.005, A_min =200, eta_min = 0.0025,omega = 0.7, phi = 0)
coverage_results <- list(
A_max = 0, eta_max = 0, A_min = 0, eta_min = 0, omega = 0, phi = 0
)
num_simulations <- 2
confidence_level <- 0.99
z_score <- qnorm((1 + confidence_level) / 2)
sigma0=1
for (i in 1:num_simulations) {
simulated_data <- simulate_asymm_func(t, true_params_asym, sigma0)
peaks <- findpeaks(simulated_data, minpeakheight = 600, minpeakdistance = 20)
t_peaks <- t[peaks[, 2]]
y_peaks <- peaks[, 1]
valleys <- findpeaks(-simulated_data, minpeakheight = -500, minpeakdistance = 50)
t_valleys <- t[valleys[, 2]]
y_valleys <- -valleys[, 1]
fit_peaks <- lm(log(y_peaks) ~ t_peaks)
fit_valleys <- lm(log(y_valleys) ~ t_valleys)
A_max_sim <- exp(coef(fit_peaks)[1])
eta_max_sim <- -coef(fit_peaks)[2]
A_min_sim <- exp(coef(fit_valleys)[1])
eta_min_sim <- -coef(fit_valleys)[2]
Delta_sim <- mean(diff(t_peaks[1:2]))
omega_sim <- 2 * pi / Delta_sim
initial_params <- list(
A_max = A_max_sim, eta_max = eta_max_sim,
A_min = A_min_sim, eta_min = eta_min_sim,
omega = omega_sim, phi = 0
)
print(paste("Iteration:", i, "Initial Params:", initial_params))
fit_nls <- nls2(
simulated_data ~ asym_model_function(t, A_max, eta_max, A_min, eta_min, omega, phi),
start = initial_params,
algorithm = "brute-force",
data = data.frame(t = t, simulated_data = simulated_data)
)
estimated_params <- coef(fit_nls)
print(paste("Iteration:", i, "Estimated Params:", estimated_params))
param_se <- summary(fit_nls)$coefficients[, "Std. Error"]
conf_intervals <- confint2(fit_nls, level = 0.99, method = 'asymptotic')
for (param in names(coverage_results)) {
true_value <- true_params_asym[[param]]
lower_bound <- conf_intervals[param, 1]
upper_bound <- conf_intervals[param, 2]
if (true_value >= lower_bound && true_value <= upper_bound) {
coverage_results[[param]] <- coverage_results[[param]] + 1
}
}
}
## [1] "Iteration: 1 Initial Params: c(`(Intercept)` = 2001.22842936914)"
## [2] "Iteration: 1 Initial Params: c(t_peaks = 0.00500879240684368)"
## [3] "Iteration: 1 Initial Params: c(`(Intercept)` = 199.922583506611)"
## [4] "Iteration: 1 Initial Params: c(t_valleys = 0.0025395218015428)"
## [5] "Iteration: 1 Initial Params: 0.705975877211189"
## [6] "Iteration: 1 Initial Params: 0"
## [1] "Iteration: 1 Estimated Params: 2001.22842936914"
## [2] "Iteration: 1 Estimated Params: 0.00500879240684368"
## [3] "Iteration: 1 Estimated Params: 199.922583506611"
## [4] "Iteration: 1 Estimated Params: 0.0025395218015428"
## [5] "Iteration: 1 Estimated Params: 0.705975877211189"
## [6] "Iteration: 1 Estimated Params: 0"
## [1] "Iteration: 2 Initial Params: c(`(Intercept)` = 2000.92363964796)"
## [2] "Iteration: 2 Initial Params: c(t_peaks = 0.00500051344975148)"
## [3] "Iteration: 2 Initial Params: c(`(Intercept)` = 199.742776469103)"
## [4] "Iteration: 2 Initial Params: c(t_valleys = 0.002501172102493)"
## [5] "Iteration: 2 Initial Params: 0.705975877211189"
## [6] "Iteration: 2 Initial Params: 0"
## [1] "Iteration: 2 Estimated Params: 2000.92363964796"
## [2] "Iteration: 2 Estimated Params: 0.00500051344975148"
## [3] "Iteration: 2 Estimated Params: 199.742776469103"
## [4] "Iteration: 2 Estimated Params: 0.002501172102493"
## [5] "Iteration: 2 Estimated Params: 0.705975877211189"
## [6] "Iteration: 2 Estimated Params: 0"
coverage_results <- sapply(coverage_results, function(x) x / num_simulations)
print(coverage_results)
## A_max eta_max A_min eta_min omega phi
## 1 1 1 1 0 1
# list to store results
coverage_results <- list(
A_max = 0, eta_max = 0, A_min = 0, eta_min = 0, omega = 0, phi = 0
)
print(true_params_asym)
## A_max eta_max A_min eta_min omega phi
## 2.0e+03 5.0e-03 2.0e+02 2.5e-03 7.0e-01 0.0e+00
num_simulations <- 100
sigma0 = 10
for (i in 1:num_simulations) {
simulated_data <- simulate_asymm_func(t, true_params_asym, sigma0)
peaks <- findpeaks(simulated_data, minpeakheight = 600, minpeakdistance = 20)
t_peaks <- t[peaks[, 2]]
y_peaks <- peaks[, 1]
valleys <- findpeaks(-simulated_data, minpeakheight = -500, minpeakdistance = 50)
t_valleys <- t[valleys[, 2]]
y_valleys <- -valleys[, 1]
fit_peaks <- lm(log(y_peaks) ~ t_peaks)
fit_valleys <- lm(log(y_valleys) ~ t_valleys)
A_max_sim <- exp(coef(fit_peaks)[1])
eta_max_sim <- -coef(fit_peaks)[2]
A_min_sim <- exp(coef(fit_valleys)[1])
eta_min_sim <- -coef(fit_valleys)[2]
Delta_sim <- t_peaks[2] - t_peaks[1]
omega_sim <- 2 * pi / Delta_sim
initial_params <- list(
A_max = A_max_sim, eta_max = eta_max_sim,
A_min = A_min_sim, eta_min = eta_min_sim,
omega = omega_sim, phi = 0
)
library(minpack.lm)
fit_nls <- tryCatch(
{
nlsLM(
simulated_data ~ asym_model_function(t, A_max, eta_max, A_min, eta_min, omega, phi),
start = initial_params,
data = data.frame(t = t, simulated_data = simulated_data)
)
},
error = function(e) {
message("Fitting failed for iteration ", i)
return(NULL)
}
)
if (is.null(fit_nls)) next
estimated_params <- coef(fit_nls)
# print(initial_params)
# print(estimated_params)
conf_intervals <- confint2(fit_nls, level = 0.99, method = 'asymptotic')
rownames(conf_intervals) <- c("A_max", "eta_max", "A_min", "eta_min", "omega", "phi")
# coverage
for (param in names(coverage_results)) {
true_value <- true_params_asym[[param]]
lower_bound <- conf_intervals[param, 1]
upper_bound <- conf_intervals[param, 2]
if (true_value >= lower_bound && true_value <= upper_bound) {
coverage_results[[param]] <- coverage_results[[param]] + 1
}
}
}
coverage_results <- sapply(coverage_results, function(x) x / num_simulations)
print(coverage_results)
## A_max eta_max A_min eta_min omega phi
## 1.00 1.00 1.00 1.00 0.97 1.00
In this analysis, we fit an asymmetrical oscillatory model to the McManus data(X1), using \(t = 100\) as the boundary to split the data into two parts: \(t < 100\) and \(t \geq 100\). The model is given by:
\[ y_{i,j} \sim N(f_j(t_i), \sigma_i^2), \quad i = 1, \dots, n, \text{ independently} \]
where the function \(f_j(t)\) is defined as:
\[ f_j(t) = 0.5 (A_{\text{max}}(t) + A_{\text{min}}(t)) + 0.5 (A_{\text{max}}(t) - A_{\text{min}}(t)) \cos(\tau t + \phi_j) \]
Here, \(A_{\text{max}}(t) = A_{\text{max}} \cdot \exp(-\eta_{\text{max}} \cdot t)\) and \(A_{\text{min}}(t) = A_{\text{min}} \cdot \exp(-\eta_{\text{min}} \cdot t)\) represent the decaying maximum and minimum amplitudes over time, with parameters \(A_{\text{max}}, \eta_{\text{max}}, A_{\text{min}}, \eta_{\text{min}}, \tau\) (frequency), and \(\phi_j\) (phase shift) as the model parameters to be estimated.
We use the function in R to fit this non-linear model separately for the data segments \(t < 100\) and \(t \geq 100\), using appropriate initial parameter values for each segment based on prior analysis.
Each model fit provides the estimated parameters \(A_{\text{max}}, \eta_{\text{max}}, A_{\text{min}}, \eta_{\text{min}}, \omega, \phi_j\) and their associated 99% confidence intervals.
# Split the data into two parts: time < 100 and time >= 100
df_MC_early <- subset(df_MC, Time < 100)
df_MC_late <- subset(df_MC, Time >= 100)
# Define initial parameters for the time < 100 case
initial_params_early <- list(
A_max = A_max_early,
eta_max = eta_max_early,
A_min = A_min_early,
eta_min = eta_min_early,
omega = omega_early,
phi_j = phi_initial
)
# Fit the model for time < 100
fit_nls_early <- nls(
X1 ~ asym_model_function(Time, A_max, eta_max, A_min, eta_min, omega, phi_j),
start = initial_params_early,
data = df_MC_early
)
conf_intervals_early <- confint2(fit_nls_early, level = 0.99, method = 'asymptotic')
cat("Fit results for time < 100:\n")
## Fit results for time < 100:
coef(fit_nls_early)
## A_max eta_max A_min eta_min omega
## 2.780759e+03 5.152686e-03 1.580878e+02 -3.823149e-03 2.600176e-01
## phi_j
## 3.672187e+00
# Fit the model for time < 100 using nlsLM
fit_nlsLM_early <- tryCatch({
nlsLM(
X1 ~ asym_model_function(Time, A_max, eta_max, A_min, eta_min, omega, phi_j),
start = initial_params_early,
data = df_MC_early
)
}, error = function(e) {
message("Fitting failed for the early time period")
return(NULL)
})
estimated_early_params <- coef(fit_nlsLM_early)
names(estimated_early_params)<-c("A_max", "eta_max", "A_min", "eta_min", "omega", "phi")
# Check if fitting was successful before calculating confidence intervals
if (!is.null(fit_nlsLM_early)) {
conf_intervals_early <- confint2(fit_nlsLM_early, level = 0.99, method = 'asymptotic')
cat("Fit results for time < 100:\n")
print(estimated_early_params)
print(conf_intervals_early)
} else {
cat("Fitting was unsuccessful for the early time period.\n")
}
## Fit results for time < 100:
## A_max eta_max A_min eta_min omega
## 2.780757e+03 5.152672e-03 1.580923e+02 -3.822675e-03 2.600176e-01
## phi
## 3.672186e+00
## 0.5 % 99.5 %
## A_max.(Intercept) 2.744675e+03 2.816839e+03
## eta_max.t_peaks_early 4.880479e-03 5.424865e-03
## A_min.(Intercept) 1.280567e+02 1.881280e+02
## eta_min.t_valleys_early -6.638467e-03 -1.006882e-03
## omega 2.595499e-01 2.604852e-01
## phi_j.(Intercept) 3.649612e+00 3.694760e+00
# Define initial parameters for the time >= 100 case
initial_params_late <- list(
A_max = A_max_late,
eta_max = eta_max_late,
A_min = A_min_late,
eta_min = eta_min_late,
omega = omega_late,
phi_j = phi_102 # Initial guess for phi_j
)
# Fit the model for time >= 100
fit_nls_late <- nls(
X1 ~ asym_model_function(Time, A_max, eta_max, A_min, eta_min, omega, phi_j),
start = initial_params_late,
data = df_MC_late
)
cat("Fit results for time >= 100:\n")
## Fit results for time >= 100:
summary(fit_nls_late)
##
## Formula: X1 ~ asym_model_function(Time, A_max, eta_max, A_min, eta_min,
## omega, phi_j)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## A_max 2.143e+03 8.167e+00 262.393 < 2e-16 ***
## eta_max 2.461e-03 1.472e-05 167.216 < 2e-16 ***
## A_min 1.769e+02 5.822e+00 30.384 < 2e-16 ***
## eta_min 5.527e-04 1.069e-04 5.172 2.43e-07 ***
## omega 2.582e-01 2.971e-05 8692.729 < 2e-16 ***
## phi_j -2.299e+00 7.514e-03 -306.002 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 65.21 on 3993 degrees of freedom
##
## Number of iterations to convergence: 12
## Achieved convergence tolerance: 4.672e-06
conf_intervals_late <- confint2(fit_nls_late, level = 0.99, method = 'asymptotic')
# Fit the model for time < 100 using nlsLM
fit_nlsLM_late <- tryCatch({
nlsLM(
X1 ~ asym_model_function(Time, A_max, eta_max, A_min, eta_min, omega, phi_j),
start = initial_params_late,
data = df_MC_late
)
}, error = function(e) {
message("Fitting failed for the late time period")
return(NULL)
})
estimated_late_params <-coef(fit_nlsLM_late)
# Check if fitting was successful before calculating confidence intervals
if (!is.null(fit_nlsLM_late)) {
conf_intervals_late <- confint2(fit_nlsLM_late, level = 0.99, method = 'asymptotic')
cat("Fit results for time < 100:\n")
print(coef(fit_nlsLM_late))
print(conf_intervals_late)
} else {
cat("Fitting was unsuccessful for the late time period.\n")
}
## Fit results for time < 100:
## A_max.(Intercept) eta_max.t_peaks_late A_min.(Intercept)
## 2.142972e+03 2.460661e-03 1.769057e+02
## eta_min.t_valleys_late omega phi_j.(Intercept)
## 5.527330e-04 2.582176e-01 -2.299163e+00
## 0.5 % 99.5 %
## A_max.(Intercept) 2.121925e+03 2.164019e+03
## eta_max.t_peaks_late 2.422738e-03 2.498583e-03
## A_min.(Intercept) 1.619013e+02 1.919101e+02
## eta_min.t_valleys_late 2.773166e-04 8.281494e-04
## omega 2.581411e-01 2.582942e-01
## phi_j.(Intercept) -2.318526e+00 -2.279800e+00
estimated_late_params
## A_max.(Intercept) eta_max.t_peaks_late A_min.(Intercept)
## 2.142972e+03 2.460661e-03 1.769057e+02
## eta_min.t_valleys_late omega phi_j.(Intercept)
## 5.527330e-04 2.582176e-01 -2.299163e+00
names(estimated_late_params) <- c("A_max", "eta_max", "A_min", "eta_min", "omega", "phi")
estimated_late_params
## A_max eta_max A_min eta_min omega
## 2.142972e+03 2.460661e-03 1.769057e+02 5.527330e-04 2.582176e-01
## phi
## -2.299163e+00
The code below extracts the estimated parameters (\(A_{\text{max}}\), \(\eta_{\text{max}}\), \(A_{\text{min}}\), \(\eta_{\text{min}}\), \(\omega\), \(\phi_j\)) and their corresponding 99% confidence intervals for both (time \(<\) 100) and (time \(\geq\) 100) periods. It combines these values into a single data frame () that lists each parameter with its estimate, lower bound (0.5%), and upper bound (99.5%) for both time periods. The purpose is to organize the parameter estimates and confidence intervals in a structured format, enabling easy comparison between the two time periods.
estimates_early <- estimated_early_params
lower_early <- conf_intervals_early[, 1]
upper_early <- conf_intervals_early[, 2]
estimates_late <- estimated_late_params
lower_late <- conf_intervals_late[, 1]
upper_late <- conf_intervals_late[, 2]
combined_data <- data.frame(
Parameter = rep(names(estimates_early), 2),
Time_Period = rep(c("time < 100", "time >= 100"), each = length(estimates_early)),
Estimate = c(estimates_early, estimates_late),
Lower_0.5 = c(lower_early, lower_late),
Upper_99.5 = c(upper_early, upper_late)
)
kable(combined_data, caption = "Estimated Parameters with 99% Confidence Intervals for Different Time Periods")
| Parameter | Time_Period | Estimate | Lower_0.5 | Upper_99.5 |
|---|---|---|---|---|
| A_max | time < 100 | 2780.7568895 | 2744.6748190 | 2816.8389599 |
| eta_max | time < 100 | 0.0051527 | 0.0048805 | 0.0054249 |
| A_min | time < 100 | 158.0923415 | 128.0567310 | 188.1279519 |
| eta_min | time < 100 | -0.0038227 | -0.0066385 | -0.0010069 |
| omega | time < 100 | 0.2600176 | 0.2595499 | 0.2604852 |
| phi | time < 100 | 3.6721860 | 3.6496118 | 3.6947601 |
| A_max | time >= 100 | 2142.9718996 | 2121.9249532 | 2164.0188460 |
| eta_max | time >= 100 | 0.0024607 | 0.0024227 | 0.0024986 |
| A_min | time >= 100 | 176.9056728 | 161.9012762 | 191.9100693 |
| eta_min | time >= 100 | 0.0005527 | 0.0002773 | 0.0008281 |
| omega | time >= 100 | 0.2582176 | 0.2581411 | 0.2582942 |
| phi | time >= 100 | -2.2991626 | -2.3185255 | -2.2797998 |
I uses the estimated parameters for the \(<\) 100 and \(\geq\) 100 periods to generate predicted values using the . The original data points (in black) for both periods are overlaid with the model’s predicted curves (in blue for and red for ). The objective is to visually assess how well the model’s predictions (based on estimated parameters) match the original data for both periods.
df_MC_early$predicted <- asym_model_function(df_MC_early$Time, estimated_early_params["A_max"], estimated_early_params["eta_max"],
estimated_early_params["A_min"], estimated_early_params["eta_min"],
estimated_early_params["omega"], estimated_early_params["phi"])
df_MC_late$predicted <- asym_model_function(df_MC_late$Time,
estimated_late_params["A_max"], estimated_late_params["eta_max"], estimated_late_params["A_min"], estimated_late_params["eta_min"],
estimated_late_params["omega"], estimated_late_params["phi"])
ggplot() +
geom_point(data = df_MC_early, aes(x = Time, y = X1), color = "black", size = 0.3) +
geom_line(data = df_MC_early, aes(x = Time, y = predicted), color = "blue", size = 0.5) +
geom_point(data = df_MC_late, aes(x = Time, y = X1), color = "black", size = 0.3) +
geom_line(data = df_MC_late, aes(x = Time, y = predicted), color = "red", size = 0.5) +
labs(title = "Comparison of Original Data and Model Fit",
x = "Time",
y = "X1") +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
Nonlinear Least Squares Model Fitting with Weights ## time specific function
# Define the model function
model_function <- function(t, A, gamma, omega, phi, y_shift) {
A * exp(-gamma * t ) * cos(omega * t + phi) + y_shift
}
# Function to calculate variance at each time point
calculate_variance <- function(replicates) {
n <- nrow(replicates)
mean_vals <- rowMeans(replicates) # Mean for each time point
variance <- rowSums((replicates - mean_vals)^2) / (n - 1) # Variance formula
return(variance)
}
# Weighted nonlinear least squares estimation
weighted_nls <- function(t, replicates, initial_params) {
variances <- calculate_variance(replicates) # Variance at each time point
# Fit the model using nlsLM
fit <- nlsLM(y ~ model_function(t, A, gamma, omega, phi, y_shift),
start = initial_params,
data = list(t = t, y = rowMeans(replicates)),
weights = 1 / variances) # Weight by the inverse of variances
return(fit)
}
# Use data from replicates for fitting
replicates <- df_MC[, c("X1", "X2", "X3", "X4")]
# Calculate variance
observed_variances <- calculate_variance(replicates)
hist(observed_variances, main = "Histogram of Variances",
xlab = "Variance",
ylab = "Frequency",
col = "lightblue",
border = "black")
Suppose we have a sample \(X_1, X_2, \dots, X_n\) drawn from a normal distribution \(\mathcal{N}(\mu, \sigma^2)\), with an observed sample variance \(S_i^2\). Then, the distribution of the observed sample variance is: \[ \frac{(n-1) S_i^2}{\sigma_i^2} \sim \chi^2_{n-1} \] where \(\chi^2_{n-1}\) denotes a chi-squared distribution with \(n-1\) degrees of freedom.
Therefore, given \(\sigma_i^2\), the probability density function (likelihood function) of the sample variance \(S_i^2\) can be written as: \[ P(S_i^2 | \sigma_i^2) \propto \left( \frac{1}{\sigma_i^2} \right)^{(n-1)/2} \exp \left( -\frac{(n-1) S_i^2}{2 \sigma_i^2} \right) \]
For simplicity, we assume the inverse of the variance \(\sigma_i^{-2}\) follows a Gamma distribution: \[ \sigma_i^{-2} \sim \text{Gamma}(\alpha_{\text{prior}}, \beta_{\text{prior}}) \] The probability density function of the Gamma distribution is: \[ P(\sigma_i^{-2}) \propto (\sigma_i^{-2})^{\alpha_{\text{prior}} - 1} \exp(-\beta_{\text{prior}} \cdot \sigma_i^{-2}) \]
According to Bayes’ theorem, the posterior distribution \(P(\sigma_i^{-2} | S_i^2)\) is proportional to the product of the prior distribution and the likelihood function: \[ P(\sigma_i^{-2} | S_i^2) \propto P(S_i^2 | \sigma_i^2) \cdot P(\sigma_i^{-2}) \] Substituting the likelihood function and prior distribution, we have: \[ P(\sigma_i^{-2} | S_i^2) \propto (\sigma_i^{-2})^{(n-1)/2} \exp \left( -\frac{(n-1) S_i^2}{2} \sigma_i^{-2} \right) \cdot (\sigma_i^{-2})^{\alpha_{\text{prior}} - 1} \exp(-\beta_{\text{prior}} \cdot \sigma_i^{-2}) \] Combining the exponents, we get: \[ P(\sigma_i^{-2} | S_i^2) \propto (\sigma_i^{-2})^{\alpha_{\text{prior}} + (n-1)/2 - 1} \exp \left( -\left( \beta_{\text{prior}} + \frac{(n-1) S_i^2}{2} \right) \sigma_i^{-2} \right) \]
We observe that this expression still has the form of a Gamma distribution. Therefore, the posterior distribution \(P(\sigma_i^{-2} | S_i^2)\) is a Gamma distribution, with posterior parameters:The mean of a Gamma distribution is given by \(\frac{\text{shape}}{\text{scale}}\), so the mean of the posterior distribution (used as an estimate of the inverse variance) is: \[ \text{Posterior Mean} = \frac{\alpha_{\text{post}}}{\beta_{\text{post}}} \]
# # Define the Bayesian inference function
bayesian_variance_estimation <- function(observed_variances, n, alpha_prior = 1, beta_prior = 1) {
# Input parameters:
# - observed_variances: a vector of observed variances
# - n: the number of samples at each time point
# - alpha_prior, beta_prior: parameters for the Gamma prior distribution
# Create a vector to store the estimated weight
weights <- numeric(length(observed_variances))
# # Iterate over each observed variance and perform Bayesian inference
for (i in 1:length(observed_variances)) {
S2_i <- observed_variances[i]
alpha_post <- alpha_prior + (n - 1) / 2 # Update the posterior parameters
beta_post <- beta_prior + S2_i * (n - 1) / 2
posterior_mean <- alpha_post / beta_post # Calculate the mean of the posterior distribution
# (mean of a Gamma distribution is shape/scale)
weights[i] <- posterior_mean # Use the posterior mean as the reliable weight
}
return(weights)
}
# # Set prior parameters (can be adjusted as needed)
alpha_prior <- 2.5 # Gamma: shape
beta_prior <- 7 # Gamma: scale
# Number of samples
n <- ncol(replicates)
# Calculate reliable weights using Bayesian inference
weights <- bayesian_variance_estimation(observed_variances, n, alpha_prior, beta_prior)
# Plot a histogram of the adjusted Bayesian weights
hist(weights, main = "Histogram of Bayesian Adjusted Weights",
xlab = "Adjusted Weight", ylab = "Frequency", col = "lightblue", border = "black")
Assume \(\sigma_i^{-2} \sim \text{Gamma}(\alpha, \beta)\), where \(\alpha\) is the shape parameter and \(\beta\) is the scale parameter. The probability density function of the Gamma distribution is given by: \[ f(x; \alpha, \beta) = \frac{\beta^\alpha}{\Gamma(\alpha)} x^{\alpha - 1} e^{-\beta x} \] where \(x = \sigma_i^{-2}\), and \(x > 0\).
Assume we have \(n\) observed values \(x_1, x_2, \ldots, x_n\) (where \(x_i = \sigma_i^{-2}\)). The likelihood function \(L(\alpha, \beta)\) for these observations is: \[ L(\alpha, \beta) = \prod_{i=1}^n f(x_i; \alpha, \beta) = \prod_{i=1}^n \frac{\beta^\alpha}{\Gamma(\alpha)} x_i^{\alpha - 1} e^{-\beta x_i} \]
For simplicity, we take the log of the likelihood function to obtain the log-likelihood \(\ell(\alpha, \beta)\): \[ \ell(\alpha, \beta) = \ln L(\alpha, \beta) = \sum_{i=1}^n \ln \left( \frac{\beta^\alpha}{\Gamma(\alpha)} x_i^{\alpha - 1} e^{-\beta x_i} \right) \] Expanding this expression: \[ \ell(\alpha, \beta) = \sum_{i=1}^n (\alpha \ln \beta - \ln \Gamma(\alpha) + (\alpha - 1) \ln x_i - \beta x_i) \] Simplifying further, we get: \[ \ell(\alpha, \beta) = n \alpha \ln \beta - n \ln \Gamma(\alpha) + (\alpha - 1) \sum_{i=1}^n \ln x_i - \beta \sum_{i=1}^n x_i \]
To find the maximum likelihood estimate, we need to take the derivatives with respect to \(\alpha\) and \(\beta\) and set them equal to zero.
\[ \frac{\partial \ell}{\partial \beta} = \frac{n \alpha}{\beta} - \sum_{i=1}^n x_i \] Setting this to zero: \[ \frac{n \alpha}{\beta} = \sum_{i=1}^n x_i \] Solving for \(\beta\): \[ \beta = \frac{n \alpha}{\sum_{i=1}^n x_i} \]
The derivative with respect to \(\alpha\) requires using the derivative of \(\ln \Gamma(\alpha)\), which is \(\psi(\alpha) = \frac{d}{d\alpha} \ln \Gamma(\alpha)\), where \(\psi(\alpha)\) is the Digamma function. \[ \frac{\partial \ell}{\partial \alpha} = n \ln \beta - n \psi(\alpha) + \sum_{i=1}^n \ln x_i \]
Calculate Sample Moments:
Sample mean \(\hat{\mu} = \frac{1}{n} \sum_{i=1}^{n} \hat{\sigma}_i^{-2}\) Sample variance \(\hat{s}^2 = \frac{1}{n-1} \sum_{i=1}^{n} \left( \hat{\sigma}_i^{-2} - \hat{\mu} \right)^2\)
According to the expectation and variance of the Gamma distribution, we have: \[ \frac{d_{\text{prior}}}{\beta_{\text{prior}}} = \hat{\mu} \] \[ \frac{d_{\text{prior}}}{\beta_{\text{prior}}^2} = \hat{s}^2 \]
By solving these two equations, we obtain: \[ \beta_{\text{prior}} = \frac{\hat{\mu}}{\hat{s}^2} \] \[ d_{\text{prior}} = \frac{\hat{\mu}^2}{\hat{s}^2} \]